package com.ggz.es.util;

import com.ggz.es.model.Content;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.stereotype.Component;

import java.net.URL;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.List;

/**
 * @version 1.0.0
 * @description: $description
 * @author: goodAsWater
 * @date: 2020-04-15 00:18
 */
@Component
public class HtmlParseUtil {

    public static final String PRIFIX_URL = "https://search.jd.com/Search?keyword=";

    public static void main(String[] args) throws Exception {

        new HtmlParseUtil().parserByKeywords("redis").forEach(System.out::println);
    }


    public  List<Content> parserByKeywords(String keywords) throws Exception {
        ArrayList<Content> goodsList = new ArrayList<>();
        String url = PRIFIX_URL + keywords + "&enc=utf-8";
        // 下面需要对中文处理
        Document document = Jsoup.parse(new URL(url+URLEncoder.encode(keywords,"UTF-8")), 3000);
        Element element = document.getElementById("J_goodsList");
        Elements elements = element.getElementsByTag("li");
        //获取元素里的内容，这里el 就是一个 li 标签
        for (Element el : elements) {
            // 图片懒加载
            String img = el.getElementsByTag("img").eq(0).attr("source-data-lazy-img");
            String price = el.getElementsByClass("p-price").eq(0).text();
            String title = el.getElementsByClass("p-name").eq(0).text();

            Content content = new Content(title,img,price);
            goodsList.add(content);
        }
        return goodsList;
    }

}
